options ls=85 nocenter;
libname save '/bbkinghome/sschaner/Angrist Work/Web Papers/Angrist_2002/immig/1900/sasdata';
filename moulton3 '/bbkinghome/sschaner/Angrist Work/Web Papers/AcemogluAngrist_2000/SAS programs/moulton3.sas';


/* table5w: first-stage for women (includes GQ obs, OCCSCORE=0 for institution)

   6-19-00 from firstwom2

   6-18-00 look at models with older*code

   female first stage and RF's: uses data94b

   revised 6-14-00 to use corrected income score imputation

   from firstwom1
   6-4-00 RETYPED

   1910,1920,1940 focus

   firstwom1 reads save.rd1900m6 or save.rd1900m8

   note: rd1900m6 uses mother, father-or-both CODES
         rd1900m8 uses father, mother-or-both CODES
         (use m6 for 40/60 follow-ups)
         data94b uses father, mother-or-both, codes mtongue hebrew

   note: 6-14-00 version of data94b omits hebrew

*/


* HERE ARE THE BITS OF CODE THAT DIFFER FOR MEN AND WOMEN;
%macro women1;
	if female=1 then age=age+2;
	if (20<=age<=50);                  
	older=(age=>36);
	%mend;
%macro men1;
	if female=0 then age=age-2;
	if (18<=age<=48);                  
	older=(age=>34);
	%mend;
	
%macro women2;
	if (91 le year le 94) and (2 le nativity le 4) and female=1;
	if (18 le age le 48);
	older= (age=>34);
	%mend;
%macro men2;
	if (91 le year le 94) and (2 le nativity le 4);
	if (20 le age le 50);
	older= (age=>36);
	%mend;
	
%macro women3;
	array ages(30) age19-age48;
	array i2ages(30) i2age19-i2age48;
	array i4ages(30) i4age19-i4age48;
		do j=1 to 30;
			ages(j)= (age= (18+j));
			i2ages(j)= ages(j)*(year=92);
			i4ages(j)= ages(j)*(year=94);
			end;
	%mend;
%macro men3;
	array ages(30) age21-age50;
	array i2ages(30) i2age21-i2age50;
	array i4ages(30) i4age21-i4age50;
		do j=1 to 30;
			ages(j)= (age= (20+j));
			i2ages(j)= ages(j)*(year=92);
			i4ages(j)= ages(j)*(year=94);
			end;
	%mend;
	
%macro women4; title 'WOMEN'; %mend;
%macro men4; title 'MEN'; %mend;
	
* CODE FOR DIFFERENT SAMPLES - FOREIGN STOCK OR FOREIGN BORN;
%macro c1;
		if (nativity=5) and (91<=year<=94);
		%mend;
%macro c2;
		if (2 le nativity le 5) and (91 le year le 94);
		fb= nativity=5;
		%mend;
%macro d1;
	var female;
	 id cohort;
	by year code older;
	output out=fb2 mean=pctfem sumwgt=nimmig;
	%mend;
%macro d2;
	var female fb;
	id cohort;
	by year code older;
	output out=fb2 mean=pctfem pctfb sum=nwomen nimmig sumwgt=nimmig;
	%mend;
	
* START MACRO;

%macro makedata(gender,val,cval);
data fb1;                               ** potential MALE fb spouses; 
 set save.data11b;

 %c&cval;
 
 %&gender.1; /*INVOKE  SPECIFIC CODE*/

 cohort=(1*(year=91))+(2*(year=92))+(3*(year=94));
 cohort=cohort+(older/10);
 
proc sort data=fb1;
 by year code older;

proc summary data=fb1;
 weight slwt;
 %d&cval.;

proc print;
title 'first-stage sex ratios for 1910,1920,1940';

data nber;
	set save.irates3;
	year=(year-1000)/10;
	ilnimmig= log(itotal);
	
proc sort data=nber;
	by year code;
	
proc print data=nber;
	by year code;
	
proc print data=nber;
	title 'nber data';
	
data fb2;
	merge fb2 nber;
	by year code;
	if year=94 and older=0 then iratio2=1;
		else iratio2=iratio;
		
***********************;
*** micro data step ***;
***********************;

data two; 	** micro FEMALE sample for follow-ups;
set save.data11b;
	
	%&gender.2; /*EXECUTE GENDER SPECIFIC CODE*/
	
	cohort=(1*(year=91))+(2*(year=92))+(3*(year=94));
	cohort=cohort+(older/10);
	
	if cohort ne .;

proc sort data=two;
	by year code older;
	
data three;
	merge two fb2;
	by year code older;
	if (age ne .);
	
	women=pctfem*nimmig; lnwom=log(women);
	men=(1-pctfem)*nimmig; lnmen=log(men);
	ratio=men/women;
	
	lnimmig=log(nimmig);
	lnratio=log(ratio);
	
	label ratio='men/women among fb';
	
	** outcomes **;
	
	married= 1 le marst le 2;
	evermar= marst<6;
		if evermar=1 then alone=(2 le marst le 4);
		spsepres= marst=1;
		inlf= (0 le occ1950 le 990);
		anykids= nchild>0;
		working= (10 le empstatd le 15);
		if year=92 then working=.;
		
		** relationship outcomes (m8 only) **;
		
		femhead= relateg=1;
		
	mominhh= (1 le momrule le 3);
	
	** earnings variables ((;
	** coding that was below plus related is now done by rdm9c **;

	altdrop= ((code=0 or (9 le code le 10))*(95 le year le 96));
	altdrop2= (9 le code le 10)*(95 le year le 96);
	run;
	
	data &gender.;
	set three;
	if female=&val; 
	if relateg ne 13;
	
	** scaled earnings variables **;
	
	if marst=1 then spsinc=spscore; else spsinc=0;
	twoscor= occscore+spsinc;
	
	if spsinc>0 then lnspsinc= log(spsinc);
	if twoscor>0 then lntwo= log(twoscor);
	
	escale3= (1+nchild+(1 le sprule le 5))**.6;
	escale3b= (1+(.5*nchild)+(.9*(2 le sprule le 5)));
	
	famscr1= famscore/myfamsiz;
	famscr2= famscore/famsize;
	
	if famscore>0 then do;
		lnfscore= log(famscore);
		lnfscr1= log(famscr1);
		lnfscr2= log(famscr2);
	end;
	
	famscr3= twoscor/escale3;
	famscr3b= twoscor/escale3b;
	
	if twoscor>0 then lnfscr3= log(famscr3);
	
	label famscr1= 'family income per adult size'
		famscr2= 'family income per family member'
		famscr3= 'parents inc. per nuke fam. member';
		
	kid5wt= slwt*nchlt5;
	kidallwt= slwt*nchild;
	
	
	*** code dummies ***;
	
	length code1-code10 age19-age48 native2 native3 year2 year4 3;
	
	year2= year=92;
	year4= year=94;
	
	native2= nativity=2;
	native3= nativity=3;
	
	code1= code=1; code2= code=2;
	code3= code=3; code4= code=4;
	code5= code=5; code6= code=6;
	code7= code=7; code8= code=8;
	code9= code=9; code10= code=10;
	
%&gender.3; /*INVOKE GENDER SPECIFIC CODE*/
			
	clusid= year+(code/100);
	
	if year ne 94 then do;
		sps2nd= 2 le spsnativ le 4;
		spsnat= spsnativ=1;
		spsfb= spsnativ=5;
	end;
	
if older=0; /*ONLY KEEP YOUNG ONES THIS TIME*/
	run;
	
	
*********************************************;
*** ols, first-stage, 2sls	*********;
*********************************************;

proc glm data=&gender;
	%&gender.4;
	title2 'OLS: slwt-weighted, YOUNG';
	weight slwt;
	class year age code nativity cohort;
	model evermar married anykids mominhh famsize myfamsiz femhead inlf occscore lnoccwge spsinc 
		twoscor famscore famscr1 =
			nativity iratio ilnimmig year*age code/ solution;
	run;
	
proc glm data=&gender;
	%&gender.4;
	title2 'FIRST STAGE: slwt-weighted, YOUNG';
	weight slwt;
	class year age code nativity cohort;
	model ratio lnimmig
		twoscor famscore famscr1 =
			nativity iratio ilnimmig year*age code/ solution;
		output out=&gender.2 p=pratio pimmig;
	run;
		
proc glm data=&gender.2;
	%&gender.4;
	title2 '2SLS: slwt-weighted, YOUNG';
	weight slwt;
	class year age code nativity cohort;
	model evermar married anykids mominhh famsize myfamsiz femhead inlf
		occscore lnoccwge spsinc twoscor famscore famscr1 
		= nativity pratio pimmig year*age code / solution;
	run;
	
***************************************;
*** Moulton Corrections ***;
***************************************;
proc sort data=&gender.;
	by clusid;
	run;
%mend makedata;	

%macro bigmoulton(gender,subset,reg);
	%macro minimoulton1(dvar);

%let depvar=&dvar;

proc reg data=&subset.;
	title %&gender.4;
	title2 'FIRST STAGE: slwt-weighted -- Compare this to GLM';
	weight slwt;
	model &depvar= ratio lnimmig %&reg.;
	output out=test2 r=v_ij;
	run;

data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=1;
%let k=62;
%let randvar= clusid;
%include moulton3;

proc datasets; delete test2; run;

proc reg data=&subset.;
	weight slwt;
		model ratio= lnimmig %&reg.;
		output out=test2 r=v_ij;
		run;

data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=2;
%let k=61;
%let randvar= clusid;
%include moulton3;
		
proc datasets;
	delete test2;

proc summary data=&subset.;
	by clusid;
	var &depvar; output out=sizeds n=size;
	
proc summary data=sizeds;
	var size;
	output out=sizeds mean=m var=v;
	
data calc;
	merge ds1 ds2 sizeds;
		drop dummy _type_;
	gfac=sqrt(1+(((v/m)+(m-1))*rho2*rho1));
	
proc print; title 'moulton correction factors - OLS';

proc reg data=&subset.;
		weight slwt;
			model lnimmig= ratio %&reg.;
			output out=test2 r=v_ij;
			
data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=2;
%let k=61;
%let randvar= clusid;
%include moulton3;
		
proc datasets;
delete test2;

proc summary data=&subset.;
	by clusid;
	var &depvar;
	output out=sizeds n=size;
	
proc summary data=sizeds;
	var size;
		output out=sizeds mean=m var=v;
		
data calc;
	merge ds1 ds2 sizeds;
		drop dummy _type_;
	gfac=sqrt(1+(((v/m)+(m-1))*rho2*rho1));

proc print;
	title 'moulton correction factors - OLS';
	run;
	%mend minimoulton1;
	
***********************************************;
* MACRO FOR IVS;
***********************************************;
	
	%macro minimoulton2(dvar);

%let depvar=&dvar;

proc syslin 2sls out=test2 data=&subset.;
	title %&gender.4;
	title2 '2SLS: slwt-weighted -- Compare this to GLM';
	weight slwt;
	endogenous ratio lnimmig &depvar.;
	instruments iratio ilnimmig %&reg.;
	model &depvar= ratio lnimmig %&reg.;
	output r=v_ij;
	run;

data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=1;
%let k=62;
%let randvar= clusid;
%include moulton3;

proc datasets; delete test2; run;

proc reg data=&subset. noprint;
	weight slwt;
		model ratio= iratio ilnimmig %&reg.;
		output out=test2a p=p_ratio;
		run;
proc reg data=&subset. noprint;
	weight slwt;
		model lnimmig= iratio ilnimmig %&reg.;
		output out=test2b p=p_immig;
		run;
data test2a;
	set test2a (keep=p_ratio);
data test2c;
	merge test2a test2b;
proc datasets;
	delete test2a test2b;
proc reg data=test2c noprint;
		weight slwt;
		model p_ratio= p_immig %&reg.;
		output out=test2 r=v_ij;

data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);
	
%let step=2;
%let k=61;
%let randvar= clusid;
%include moulton3;
		
proc datasets;
	delete test2;

proc reg data=test2c noprint;
	weight slwt;
	model p_immig= p_ratio %&reg.;
	output out=test2 r=v_ij;
data test2;
	set test2;
	v_ij= v_ij*sqrt(slwt);

%let step=3;
%let k=61;
%let randvar= clusid;
%include moulton3;
	
proc summary data=&subset.;
	by clusid;
	var &depvar; output out=sizeds n=size;
	
proc summary data=sizeds;
	var size;
	output out=sizeds mean=m var=v;
	
data calc;
	merge ds1 ds2 ds3 sizeds;
		drop dummy _type_;
	mocrat=sqrt(1+(((v/m)+(m-1))*rho2*rho1));
	mocimmig=sqrt(1+(((v/m)+(m-1))*rho3*rho1));
	
proc print; title 'moulton correction factors - 2SLS';
run;
	%mend minimoulton2;

	%minimoulton1(evermar);  /* %minimoulton1(married); %minimoulton1(anykids);
	%minimoulton1(mominhh);   %minimoulton1(famsize); %minimoulton1(myfamsiz);
	%minimoulton1(femhead);   %minimoulton1(inlf);    %minimoulton1(occscore);
	%minimoulton1(lnoccwge); %minimoulton1(spsinc);  %minimoulton1(twoscor);
	%minimoulton1(famscore);  %minimoulton1(famscr1); */

	%minimoulton2(evermar);   /* %minimoulton2(married); %minimoulton2(anykids);
	%minimoulton2(mominhh);   %minimoulton2(famsize); %minimoulton2(myfamsiz);
	%minimoulton2(femhead);   %minimoulton2(inlf);    %minimoulton2(occscore);
	%minimoulton2(lnoccwge); %minimoulton2(spsinc);  %minimoulton2(twoscor);
	%minimoulton2(famscore);  %minimoulton2(famscr1); */
	
	%mend bigmoulton;

* DIFFERENT REGRESSION LISTS!!!;
	* USE w WITH WOMEN, m WITH MEN;
%macro regsw; native2 native3 year2 year4 code1-code10 age19-age33 
	i2age19-i2age33 i4age19-i4age33;
	%mend;	
%macro regsm; native2 native3 year2 year4 code1-code10 age21-age35 
	i2age21-i2age35 i4age21-i4age35;
	%mend;	

%makedata(women,1,1);
%bigmoulton(women,women,regsw);

%makedata(women,1,2);
%bigmoulton(women,women,regsw);

%makedata(men,0,1);	
%bigmoulton(men,men,regsm);

%makedata(men,0,2);	
%bigmoulton(men,men,regsm);

